/********************************************************************************
Discrimination in Multi-Phase Systems: Evidence from Child Protection

Created on: 12/28/2022
Last Modified on: 2/17/2024

Description: This program generates the primary investigator-level placement and outcome
rates, as well as standard errors for these estimates.

The program takes as an input (i) a child by investigation level dataset spanning 
January 2008 to December 2019, subject to the sample restrictions discussed in the paper,
and (ii) a dataset containing various other "omitted payoffs," which we use to probe the 
robustness of our main proxy for child maltreatment in the paper. 
 
Using these child by investigation level datasets, the program then estimates
investigator-specific rates and standard errors using a linear adjustment to 
account for randomization strata, which we discuss in the paper.  

Note that we have removed the file directory names from this program for 
confidentiality reasons.
********************************************************************************/

**************************
**(0) SETUP
**************************
clear
set more off
macro drop all
capture log close
set seed 02042023

*Set directories 
global cleand
global tmpd 
global output 

**************************
**(1) USING THE MAIN ANALYSIS SAMPLE FROM 2008 TO 2019, COMPUTE RATES AND STANDARD ERRORS 
**************************

***********
**(1A) COMPUTE INVESTIGATOR RATES
***********
use "${cleand}analysis_sample_investigators_qje.dta", clear 
cap drop _merge 
merge 1:1 vicid inv_caseid using "${tmpd}inv_omitted_payoffs_qje.dta", keepus(inv6m_sub inv6m_fc inv6m_phyab inv6m_neglect  ///
inv6m_phyab_sub inv6m_neglect_sub) keep(1 3)

keep worker_id rotationgroup pre_black fc inv6m inv6m_sub inv6m_fc inv6m_phyab inv6m_neglect  ///
inv6m_phyab_sub inv6m_neglect_sub count_inv bshare zipcode_vic cps_year vicid  

rename (inv6m_phyab inv6m_neglect inv6m_phyab_sub inv6m_neglect_sub) (inv6m_p inv6m_n inv6m_p_sub inv6m_n_sub)

rename pre_black black 
gen white = black==0 

**Estimate investigator placement and subsequent maltreatment rates
gen nofc=fc==0

egen grpworker_id = group(worker_id)
egen cell=group(zipcode_vic cps_year)

levelsof grpworker_id
local levels = "`r(levels)'"


preserve 
* Loop over all the outcomes 
foreach var in nofc inv6m inv6m_sub inv6m_fc inv6m_p inv6m_n inv6m_p_sub inv6m_n_sub {
	* Use full sample for foster care placement
	if "`var'"=="nofc"{
		qui: reghdfe `var' i.grpworker_id, resid absorb(cell)
	}
	
	* For selectively observed vars, condition on being left at home
	if "`var'"!="nofc"{
		qui: reghdfe `var' i.grpworker_id if nofc == 1, resid absorb(cell)
	}
	
	* retrieve average of fixed effect
	predict xbd, xbd
	predict xb, xb
	gen d = xbd - xb 
	sum d 
	local exp = r(mean)
	
	gen base_`var' = .
	
	qui{
	foreach i in `levels' {
		capture: lincom _cons + _b[`i'.grpworker_id] + `exp'
		
		if _rc == 0 {
            replace base_`var' =  r(estimate) if grpworker_id == `i'
		}
	}
	}
	drop xbd xb d
	* By race (black and white)
	* For fc placement outcome, use the entire sample
	qui {
	if "`var'"=="nofc"{
		gen x = `var' if black == 1 | white == 1
		reghdfe x i.grpworker_id i.grpworker_id#i.black, absorb(cell) resid
	}
	
	* For other outcomes, condition on not being in fc
	if "`var'"!="nofc"{
		gen x = `var' if black == 1 | white == 1
		reghdfe x i.grpworker_id i.grpworker_id#i.black if nofc==1, resid absorb(cell)
	}
	}
	
	* retrieve average of fixed effect
	predict xbd, xbd
	predict xb, xb
	gen d = xbd - xb 
	sum d 
	local exp = r(mean)
	
	gen b_`var' = .
	gen w_`var' = .
   
   	* Quietly
	qui {
	* White children
	foreach i in `levels'{
		capture: lincom _cons + _b[`i'.grpworker_id] + `exp'
		if _rc == 0 {
            replace w_`var' =  r(estimate) if grpworker_id == `i'
		}
	}
	* Black children
	foreach i in `levels'{
		capture: lincom _cons + _b[`i'.grpworker_id] + _b[`i'.grpworker_id#1.black] + `exp'
		if _rc == 0 {
            replace b_`var' =  r(estimate) if grpworker_id == `i'
		}
	}
	}
	drop x xbd xb d
}

*----------------------------------------------------------------------*
* Save investigator-level dataset 
*----------------------------------------------------------------------*
cap drop count_inv 
bys worker_id: gen count_inv = _N
bys worker_id: egen count_black = total(black)
bys worker_id : egen count_white = total(white)

* Keep relevant vars
keep base_* b_* w_* worker_id count_inv count_black count_white grpworker_id
duplicates drop worker_id, force 	
drop if worker_id == .

save "${cleand}inv_adjusted_rates_omitted_qje.dta", replace
restore 

***********
**(1B) COMPUTE STANDARD ERRORS FOR INVESTIGATOR RATES
***********
preserve
* Loop over all the outcomes 
foreach var in nofc inv6m inv6m_sub inv6m_fc inv6m_p inv6m_n inv6m_p_sub inv6m_n_sub {
	* By race (black and white)
	* For fc placement outcome, use the entire sample
	qui {
	if "`var'"=="nofc"{
		gen x = `var' if black == 1 | white == 1
		reghdfe x i.grpworker_id i.grpworker_id#i.black, absorb(cell) resid 
	}
	
	* For other outcomes, condition on not being in fc
	if "`var'"!="nofc"{
		gen x = `var' if black == 1 | white == 1
		reghdfe x i.grpworker_id i.grpworker_id#i.black if nofc==1, resid absorb(cell)
	}
	}
	
	* retrieve average of fixed effect
	predict xbd, xbd
	predict xb, xb
	gen d = xbd - xb 
	sum d 
	local exp = r(mean)
	
	gen b_se_`var' = .
	gen w_se_`var' = .
   
   	* Quietly
	qui {
	* White children
	foreach i in `levels'{
		capture: lincom _cons + _b[`i'.grpworker_id] + `exp'
		if _rc == 0 {
			replace w_se_`var'= r(se) if  grpworker_id==`i'
		}
	}
	* Black children
	foreach i in `levels'{
		capture: lincom _cons + _b[`i'.grpworker_id] + _b[`i'.grpworker_id#1.black] + `exp'
		if _rc == 0 {
			replace b_se_`var'= r(se) if  grpworker_id==`i'
		}
	}
	}
	drop x xbd xb d
}

*----------------------------------------------------------------------*
* Save investigator-level dataset 
*----------------------------------------------------------------------*
* Keep relevant vars
keep b_* w_* worker_id
duplicates drop worker_id, force 	
drop if worker_id == .

save "${cleand}inv_adjusted_se_omitted_qje.dta", replace
restore 


***********
**(1C) COMPUTE STANDARD ERRORS FOR INVESTIGATOR RATES, CLUSTERED BY CHILD AND INVESTIGATOR
***********
preserve
* Loop over all the outcomes 
foreach var in nofc inv6m inv6m_sub inv6m_fc inv6m_p inv6m_n inv6m_p_sub inv6m_n_sub {
	* By race (black and white)
	* For fc placement outcome, use the entire sample
	qui {
	if "`var'"=="nofc"{
		gen x = `var' if black == 1 | white == 1
		reghdfe x i.grpworker_id i.grpworker_id#i.black, absorb(cell) resid cluster(vicid grpworker_id)
	}
	
	* For other outcomes, condition on not being in fc
	if "`var'"!="nofc"{
		gen x = `var' if black == 1 | white == 1
		reghdfe x i.grpworker_id i.grpworker_id#i.black if nofc==1, resid absorb(cell) cluster(vicid grpworker_id)
	}
	}
	
	* retrieve average of fixed effect
	predict xbd, xbd
	predict xb, xb
	gen d = xbd - xb 
	sum d 
	local exp = r(mean)
	
	gen b_twse_`var' = .
	gen w_twse_`var' = .
   
   	* Quietly
	qui {
	* White children
	foreach i in `levels'{
		capture: lincom _cons + _b[`i'.grpworker_id] + `exp'
		if _rc == 0 {
			replace w_twse_`var'= r(se) if  grpworker_id==`i'
		}
	}
	* Black children
	foreach i in `levels'{
		capture: lincom _cons + _b[`i'.grpworker_id] + _b[`i'.grpworker_id#1.black] + `exp'
		if _rc == 0 {
			replace b_twse_`var'= r(se) if  grpworker_id==`i'
		}
	}
	}
	drop x xbd xb d
}

*----------------------------------------------------------------------*
* Save investigator-level dataset 
*----------------------------------------------------------------------*
* Keep relevant vars
keep b_* w_* worker_id
duplicates drop worker_id, force 	
drop if worker_id == .

save "${cleand}inv_adjusted_twse_omitted_qje.dta", replace
restore 


***********
**(1D) COMPUTE STANDARD ERRORS FOR INVESTIGATOR RATES, CLUSTERED BY CHILD AND ROTATION
***********
preserve 
* Loop over all the outcomes 
foreach var in nofc inv6m inv6m_sub inv6m_fc inv6m_p inv6m_n inv6m_p_sub inv6m_n_sub {
	* By race (black and white)
	* For fc placement outcome, use the entire sample
	qui {
	if "`var'"=="nofc"{
		gen x = `var' if black == 1 | white == 1
		reghdfe x i.grpworker_id i.grpworker_id#i.black, absorb(cell) resid cluster(vicid cell)
	}
	
	* For other outcomes, condition on not being in fc
	if "`var'"!="nofc"{
		gen x = `var' if black == 1 | white == 1
		reghdfe x i.grpworker_id i.grpworker_id#i.black if nofc==1, resid absorb(cell) cluster(vicid cell)
	}
	}
	
	* retrieve average of fixed effect
	predict xbd, xbd
	predict xb, xb
	gen d = xbd - xb 
	sum d 
	local exp = r(mean)
	
	gen b_twrse_`var' = .
	gen w_twrse_`var' = .
   
   	* Quietly
	qui {
	* White children
	foreach i in `levels'{
		capture: lincom _cons + _b[`i'.grpworker_id] + `exp'
		if _rc == 0 {
			replace w_twrse_`var'= r(se) if  grpworker_id==`i'
		}
	}
	* Black children
	foreach i in `levels'{
		capture: lincom _cons + _b[`i'.grpworker_id] + _b[`i'.grpworker_id#1.black] + `exp'
		if _rc == 0 {
			replace b_twrse_`var'= r(se) if  grpworker_id==`i'
		}
	}
	}
	drop x xbd xb d
}

*----------------------------------------------------------------------*
* Save investigator-level dataset 
*----------------------------------------------------------------------*
* Keep relevant vars
keep b_* w_* worker_id
duplicates drop worker_id, force 	
drop if worker_id == .

save "${cleand}inv_adjusted_twrse_omitted_qje.dta", replace
restore 


***********
**(2) MERGE DATASETS AND SAVE FINAL INVESTIGATOR-LEVEL DATASET
***********
use "${cleand}inv_adjusted_rates_omitted_qje.dta", clear 
cap drop _merge 
merge 1:1 worker_id using "${cleand}inv_adjusted_se_omitted_qje.dta", keepus(b_se_* w_se_*)
cap drop _merge 
merge 1:1 worker_id using "${cleand}inv_adjusted_twse_omitted_qje.dta", keepus(b_twse_* w_twse_*)
cap drop _merge 
merge 1:1 worker_id using "${cleand}inv_adjusted_twrse_omitted_qje.dta", keepus(b_twrse_* w_twrse_*)

*Gen additional variables needed for the main analysis 
gen share_black = count_black/count_inv 
egen bshare = wtmean(share_black), weight(count_inv)
	
	foreach x in w_nofc b_nofc w_inv6m b_inv6m {
		replace `x'=0 if `x'<0 
		replace `x'=1 if `x'>1
	}

save "${cleand}inv_adjusted_rates_se_omitted_qje.dta", replace 
